We simulate a random sample of 1000 Normal\((\mu=0, \sigma=5)\) IID random variables.
n <- 1000 x <- rnorm(n, mean = 0, sd = 5)
We simulate a random sample of 1000 Normal\((\mu=0, \sigma=5)\) IID random variables.
n <- 1000 x <- rnorm(n, mean = 0, sd = 5)
The log-likelihood function can be computed with the following function:
l <- function(x, mu, sigma){
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
We plot the log-likelihood function for \((\mu, \sigma)\) given \(\vec{x}\), saving the values in l_surface (code hidden).
library(plotly) plot_ly(z = ~l_surface) %>% add_surface(x = sigma, y = mu)
We compute the MLE's \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) for \((\mu, \sigma)\):
xbar <- sum(x)/n sigma_hat <- sqrt((1 / n) * sum((x - xbar) ^ 2)) c(xbar, sigma_hat)
## [1] 0.02254042 4.87910270
The MLE \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) are the parameter values that best explain/fit the observed data x.
A group of students notes the production code numbers of each of their iphones as follows:
## [1] 35827 7934 129 45236 36847 26018
Using common-sense, use this data to come up with an estimate of the total number of phones produced by Apple in this production run.
Two paradigms for visualizing a function in a computational framework:
Both require writing functions.
my_exponent <- function(a, b) {
a^b
}
my_exponent(3, 2)
## [1] 9
function() function.\[ L(p) = p^{\sum_{i=1}^n x_i}(1 - p)^{n - \sum_{i=1}^n x_i} \]
L_binom <- function(p, x) {
n <- length(x)
n_success <- sum(x)
p ^ n_success * (1 - p) ^ (n - n_success)
}
x <- c(1, 1, 0, 1, 0)
L_binom(p = .5, x = x)
## [1] 0.03125
L_binom(3/5, x = x)
## [1] 0.03456
In this approach, you
p_vec <- seq(from = 0, to = 1, by = .005) head(p_vec)
## [1] 0.000 0.005 0.010 0.015 0.020 0.025
L_vec <- L_binom(p_vec, x) head(L_vec)
## [1] 0.000000e+00 1.237531e-07 9.801000e-07 3.274509e-06 7.683200e-06 ## [6] 1.485352e-05
df <- data.frame(x = p_vec,
y = L_vec)
ggplot(df, aes(x = x, y = y)) +
geom_line()
It's helpful to see how the likelihood responds to increasing data.
x_big <- rep(x, 10)
L_vec <- L_binom(p_vec, x_big)
df <- data.frame(x = p_vec,
y = L_vec)
ggplot(df, aes(x = x, y = y)) + geom_line()
The second method doesn't require the discretization of the function.
ggplot(data.frame(x = c(0, 1)), aes(x)) + stat_function(fun = L_binom, args = list(x = x_big))
\[ l(\mu, \sigma) = -\frac{n}{2}\log(2\pi) - n \log (\sigma) - \frac{1}{2 \sigma^2} \sum_{i = 1}^{n} \left(x_i - \mu\right) ^2 \]
l_norm <- function(mu, sigma, x) {
n <- length(x)
p1 <- -n / 2 * log(2 * pi)
p2 <- -n * log(sigma)
p3 <- -1 / (2 * sigma^2) * sum((x - mu)^2)
p1 + p2 + p3
}
l_norm_quick <- function(mu, sigma, x) {
sum(dnorm(x, mean = mu, sd = sigma, log = TRUE))
}
l_norm(0, 5, x)
## [1] -12.70188
l_norm_quick(0, 5, x)
## [1] -12.70188
x <- rnorm(100, mean = 0, sd = 5)
In 2D, we have to define a grid of possible parameter values and compute the likelihood for each coordinate
# Define 2D domain mu <- seq(-1.5, 1.5, length = 500) sigma <- seq(4, 6, length = 500)
# Compute log-likelihood
l_surface <- matrix(0, nrow = length(mu), ncol = length(sigma))
for(i in 1:nrow(l_surface)) {
for(j in 1:ncol(l_surface)) {
l_surface[i, j] <- l_norm(mu = mu[i], sigma = sigma[j], x)
}
}
library(plotly) plot_ly(z = ~l_surface) %>% add_surface(x = sigma, y = mu)
We compute the MLE's \((\widehat{\mu}_{\text{MLE}}, \widehat{\sigma}_{\text{MLE}})\) for \((\mu, \sigma)\):
n <- length(x) xbar <- sum(x)/n sigma_hat <- sqrt((1 / n) * sum((x - xbar) ^ 2)) c(xbar, sigma_hat)
## [1] -0.3171497 4.3883399
set.seed(301)
x <- sample(1:20, 3)
L_unif <- function(beta, x) {
n <- length(x)
(1 / beta) ^ n
}
ggplot(data.frame(x = c(0, 20)), aes(x)) + stat_function(fun = L_unif, args = list(x = x))
ggplot(data.frame(x = c(0, 20)), aes(x)) + stat_function(fun = L_unif, args = list(x = x))+ xlim(c(max(x), 20))